{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Notebook for Estimations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook reproduces the the regression results for the paper \"The impact of carbon prices on renewable energy support\" by Jan Abrell and Mirjam Kosch. Running the notebook performs all regressions including the robustness exercises and saves results in the directory \"../results/\". \n",
    "\n",
    "mk, 16.7.2021"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Packages and Options"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "import statsmodels.api as sm\n",
    "from statsmodels.iolib.summary2 import summary_col"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "dir_in = \"../data/\"\n",
    "dir_out = \"../results/table_coefficients/\"\n",
    "dir_cov = \"../results/cov_for_SE/\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Choose years for estimation**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "year_start = \"2015\"\n",
    "year_end = \"2016\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Choose coal-to-gas price ratio that marks switching point**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "r_bar = 0.74"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Define variables to be used in regression**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "var_all = [\"wind\", \"solar\", \"dem\", \"kgas\", \"kcoal\", \"t\", \"phi\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Technologies\n",
    "techs = [\"fpn_coal\", \"fpn_ccgt\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load and prepare data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Source data with all information\n",
    "fn_emi  = dir_in+\"generation_aggregated.csv\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_emi = pd.read_csv(fn_emi, parse_dates=True, index_col=\"date\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# define demand as sum of fossil and RE generation\n",
    "df_emi[\"demand\"] = df_emi.solar+df_emi.wind+df_emi.fpn_ccgt+df_emi.fpn_coal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_in = df_emi[year_start:year_end]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Currently only use data up to 29.12.2016"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_in = df_in[\"2015-01-01\":\"2016-12-29\"].copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>fpn_ccgt</th>\n",
       "      <th>fpn_coal</th>\n",
       "      <th>emi_ccgt</th>\n",
       "      <th>emi_coal</th>\n",
       "      <th>mel_ccgt</th>\n",
       "      <th>mel_coal</th>\n",
       "      <th>melold_ccgt</th>\n",
       "      <th>melold_coal</th>\n",
       "      <th>emi</th>\n",
       "      <th>w_temperature_mean</th>\n",
       "      <th>...</th>\n",
       "      <th>wind</th>\n",
       "      <th>solar</th>\n",
       "      <th>dem_fos</th>\n",
       "      <th>dem_fos_ren</th>\n",
       "      <th>res</th>\n",
       "      <th>demand</th>\n",
       "      <th>w_wind</th>\n",
       "      <th>w_solar</th>\n",
       "      <th>w_demand</th>\n",
       "      <th>regime</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2015-01-01 00:00:00</th>\n",
       "      <td>2405.016667</td>\n",
       "      <td>8397.0</td>\n",
       "      <td>929.020834</td>\n",
       "      <td>7310.836949</td>\n",
       "      <td>2407.150000</td>\n",
       "      <td>8927.000000</td>\n",
       "      <td>3462.5</td>\n",
       "      <td>9237.00</td>\n",
       "      <td>8239.857782</td>\n",
       "      <td>8.485714</td>\n",
       "      <td>...</td>\n",
       "      <td>6989.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10802.016667</td>\n",
       "      <td>17791.516667</td>\n",
       "      <td>6989.5</td>\n",
       "      <td>17791.516667</td>\n",
       "      <td>0.000111</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000046</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-01-01 01:00:00</th>\n",
       "      <td>2404.016667</td>\n",
       "      <td>8114.5</td>\n",
       "      <td>928.562505</td>\n",
       "      <td>7069.382734</td>\n",
       "      <td>21984.466667</td>\n",
       "      <td>13416.333333</td>\n",
       "      <td>20094.4</td>\n",
       "      <td>13756.75</td>\n",
       "      <td>7997.945239</td>\n",
       "      <td>8.485714</td>\n",
       "      <td>...</td>\n",
       "      <td>6971.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10518.516667</td>\n",
       "      <td>17489.516667</td>\n",
       "      <td>6971.0</td>\n",
       "      <td>17489.516667</td>\n",
       "      <td>0.000111</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000045</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                        fpn_ccgt  fpn_coal    emi_ccgt     emi_coal  \\\n",
       "date                                                                  \n",
       "2015-01-01 00:00:00  2405.016667    8397.0  929.020834  7310.836949   \n",
       "2015-01-01 01:00:00  2404.016667    8114.5  928.562505  7069.382734   \n",
       "\n",
       "                         mel_ccgt      mel_coal  melold_ccgt  melold_coal  \\\n",
       "date                                                                        \n",
       "2015-01-01 00:00:00   2407.150000   8927.000000       3462.5      9237.00   \n",
       "2015-01-01 01:00:00  21984.466667  13416.333333      20094.4     13756.75   \n",
       "\n",
       "                             emi  w_temperature_mean  ...    wind  solar  \\\n",
       "date                                                  ...                  \n",
       "2015-01-01 00:00:00  8239.857782            8.485714  ...  6989.5    0.0   \n",
       "2015-01-01 01:00:00  7997.945239            8.485714  ...  6971.0    0.0   \n",
       "\n",
       "                          dem_fos   dem_fos_ren     res        demand  \\\n",
       "date                                                                    \n",
       "2015-01-01 00:00:00  10802.016667  17791.516667  6989.5  17791.516667   \n",
       "2015-01-01 01:00:00  10518.516667  17489.516667  6971.0  17489.516667   \n",
       "\n",
       "                       w_wind  w_solar  w_demand  regime  \n",
       "date                                                      \n",
       "2015-01-01 00:00:00  0.000111      0.0  0.000046     low  \n",
       "2015-01-01 01:00:00  0.000111      0.0  0.000045     low  \n",
       "\n",
       "[2 rows x 28 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_in.head(2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Prepare data for regression"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Dataframe with fossil generation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_tech = df_in[techs]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Add indicator to later define regime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_in[\"phi_low\"] = 0\n",
    "df_in[\"phi_high\"] = 0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Add Dummies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Month of year dummies\n",
    "df_ = df_in.reset_index()\n",
    "df_[\"mofy\"] = df_.date.map(lambda x: (x.year-int(year_start))*12 + x.month)\n",
    "df_ = pd.get_dummies(df_,columns=[\"mofy\"])\n",
    "df_in = df_.set_index(\"date\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Hour and month dummies\n",
    "df_in2 = df_in.copy()\n",
    "df_in2 = df_in2.reset_index()\n",
    "df_in2[\"hour\"] = df_in2[\"date\"].dt.hour\n",
    "df_in2[\"month\"] = df_in2[\"date\"].dt.month\n",
    "df_in2 = pd.get_dummies(df_in2, columns=['hour','month'])\n",
    "df_in  = df_in2.set_index(\"date\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Columns with dummies\n",
    "col_month = [c for c in df_in.columns if c.startswith(\"month\")]\n",
    "col_mofy = [c for c in df_in.columns if c.startswith(\"mofy\")]\n",
    "col_hour = [c for c in df_in.columns if c.startswith(\"hour\")]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Prepare smaller data frame with only variables used for regression and rename columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>wind</th>\n",
       "      <th>solar</th>\n",
       "      <th>dem</th>\n",
       "      <th>kgas</th>\n",
       "      <th>kcoal</th>\n",
       "      <th>t</th>\n",
       "      <th>phi</th>\n",
       "      <th>const</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2015-01-01 00:00:00</th>\n",
       "      <td>6989.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>17791.516667</td>\n",
       "      <td>2407.150000</td>\n",
       "      <td>8927.000000</td>\n",
       "      <td>8.485714</td>\n",
       "      <td>0.58942</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-01-01 01:00:00</th>\n",
       "      <td>6971.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>17489.516667</td>\n",
       "      <td>21984.466667</td>\n",
       "      <td>13416.333333</td>\n",
       "      <td>8.485714</td>\n",
       "      <td>0.58942</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       wind  solar           dem          kgas         kcoal  \\\n",
       "date                                                                           \n",
       "2015-01-01 00:00:00  6989.5    0.0  17791.516667   2407.150000   8927.000000   \n",
       "2015-01-01 01:00:00  6971.0    0.0  17489.516667  21984.466667  13416.333333   \n",
       "\n",
       "                            t      phi  const  \n",
       "date                                           \n",
       "2015-01-01 00:00:00  8.485714  0.58942      1  \n",
       "2015-01-01 01:00:00  8.485714  0.58942      1  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_short = df_in.copy()\n",
    "df_short = df_short.rename(columns = {\"wind\":\"wind\", \"solar\":\"solar\", \"mel_ccgt\":\"kgas\", \n",
    "                      \"mel_coal\": \"kcoal\", \"w_temperature_mean\":\"t\", \"ratio_carb\":\"phi\", \"demand\":\"dem\" })\n",
    "df_short = df_short[var_all]\n",
    "\n",
    "# Add constant\n",
    "df_short[\"const\"] = 1\n",
    "df_short.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create dataframe with percentiles\n",
    "df_perc = df_short.copy()\n",
    "\n",
    "# Add total RE generation\n",
    "df_perc[\"RE\"] = df_perc.wind + df_perc.solar\n",
    "\n",
    "# Add percentiles for demand and RE production\n",
    "nperc = 5\n",
    "df_perc[\"demperc\"] = pd.qcut(df_perc.dem, nperc, labels=False)\n",
    "df_perc[\"REperc\"] = pd.qcut(df_perc.RE, nperc, labels=False)\n",
    "\n",
    "# Create dummies\n",
    "df_perc = pd.get_dummies(df_perc,columns=[\"demperc\",\"REperc\"])\n",
    "col_demperc = [c for c in df_perc.columns if c.startswith(\"demperc\")]\n",
    "col_REperc = [c for c in df_perc.columns if c.startswith(\"REperc\")]\n",
    "\n",
    "# Dataframes with RE- and demand-percentiles, respecitively\n",
    "df_demperc = df_perc[col_demperc]\n",
    "df_REperc = df_perc[col_REperc]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_poly(df, var_poly):\n",
    "    \"\"\"function returns dataframe with higher order polynomials\n",
    "    df: <pd.DataFrame> with variables\n",
    "    var_poly: variables\n",
    "    return: <pd.DataFrame> with second and third order polynomials\"\"\"\n",
    "\n",
    "    df_ = df[var_poly].copy()\n",
    "    for v in var_poly:\n",
    "        # Second and third order polynomial\n",
    "        df_[\"%s_%s\" %(v,v)] = df_[\"%s\" %v]*df_[\"%s\" %v] \n",
    "        df_[\"%s_%s_%s\" %(v,v,v)] = df_[\"%s\" %v]*df_[\"%s\" %v]*df_[\"%s\" %v] \n",
    "    df_ = df_.drop(var_poly, axis=1)\n",
    "        \n",
    "    return df_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_lags(df, var_lag, num_var):\n",
    "    \"\"\"function returns dataframe with lagged variables\n",
    "    df: <pd.DataFrame> with variables to lag\n",
    "    num_lag: number of lags\n",
    "    lag_var: variables to lag\n",
    "    return: <pd.DataFrame> with lagged variables\"\"\"\n",
    "    \n",
    "    df_ = df[var_lag].copy()\n",
    "    for v in var_lag:\n",
    "        df_ = df_.join(pd.DataFrame({v + \"_lag\" + str(lag): df_[v].shift(lag) for lag in range(-num_lag,0)}))\n",
    "\n",
    "    df_ = df_.join(pd.DataFrame({\"fpn_ccgt\" + \"_lag\" + str(lag): df_in[\"fpn_ccgt\"].shift(lag) for lag in range(-num_lag,0)}))\n",
    "    df_ = df_.join(pd.DataFrame({\"fpn_coal\" + \"_lag\" + str(lag): df_in[\"fpn_coal\"].shift(lag) for lag in range(-num_lag,0)}))\n",
    "\n",
    "    df_ = df_.drop(var_lag, axis=1)\n",
    "   \n",
    "    return df_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_regimes(df_in_, df, r_bar, name):\n",
    "    \"\"\"function adds interacts indicator for fuel switch regime (0/1) and regressors\n",
    "    df: <pd.DataFrame> with regressors\n",
    "    return: <pd.DataFrame> with indicator for low/high regime\"\"\"\n",
    "    \n",
    "    df_in_[\"phi_low\"] = 0\n",
    "    df_in_[\"phi_high\"] = 0\n",
    "    df_in_.loc[(df_in_.ratio_carb < r_bar), \"phi_low\"] = 1\n",
    "    df_in_.loc[(df_in_.ratio_carb >= r_bar), \"phi_high\"] = 1\n",
    "    \n",
    "    df_low  = df.multiply(df_in_[\"phi_low\"], axis=\"index\")\n",
    "    rename_cols_low = {i: (\"low_\"+i) for i in df_low.columns}\n",
    "    df_low = df_low.rename(columns=rename_cols_low)\n",
    "\n",
    "    df_high = df.multiply(df_in_[\"phi_high\"], axis=\"index\")\n",
    "    rename_cols_high = {i: (\"high_\"+i) for i in df_high.columns}\n",
    "    df_high = df_high.rename(columns=rename_cols_high)\n",
    "    \n",
    "    df_ = pd.concat([df_low, df_high], axis=1)\n",
    "    \n",
    "#    df_.to_csv(dir_out+\"df_reg_%s.csv\" %name)\n",
    "    \n",
    "    return df_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_results(df_reg, df_t, name):\n",
    "    \"\"\"regressions\n",
    "    param: <pd.DataFrame> with regressors\n",
    "    return: <pd.DataFrame> with regression results\"\"\"\n",
    "\n",
    "    lst_results = []\n",
    "    allresults  = {}\n",
    "    \n",
    "    for tech in techs:\n",
    "        \n",
    "        model   = sm.OLS(df_t[tech], df_reg)\n",
    "        results = model.fit(cov_type='HC3')\n",
    "        allresults.update({tech: results})\n",
    "        \n",
    "        df_results = pd.DataFrame(data=results.params,columns=[\"coef\"])\n",
    "        df_results[\"std\"]    = results.bse\n",
    "        df_results[\"pvalue\"] = results.pvalues\n",
    "        df_results[\"tech\"]   = tech\n",
    "        df_results[\"Model\"]  = name\n",
    "        \n",
    "        # Save covariance matrix\n",
    "        results.cov_params().reset_index().to_csv(dir_cov + \"%s_%s_cov.csv\" % (name, tech), index=False)\n",
    "    \n",
    "        lst_results.append(df_results)\n",
    "        \n",
    "    df_results = pd.concat(lst_results)\n",
    " \n",
    "    return df_results, allresults"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Interact percentiles with selected variables\n",
    "def get_perc_interaction(v, df_perc):\n",
    "    mux = pd.MultiIndex.from_product([df_short[v].columns, df_perc.columns])\n",
    "    df_short_ = df_short[v].reindex(mux, axis=1, level=0)\n",
    "    df_perc_ = df_perc.reindex(mux, axis=1, level=1)\n",
    "    df_int = df_short_.mul(df_perc_, axis=0)\n",
    "    df_int.columns = df_int.columns.map(\"_\".join)\n",
    "    return df_int"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Estimations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Without lags"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Linear"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "name = \"linear\"\n",
    "df_vars = df_short.copy()\n",
    "# Add indicators for regime\n",
    "df_reg_ = get_regimes(df_in, df_vars, r_bar, name)\n",
    "df_linear, res_linear   = get_results(df_reg_, df_tech, name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Specification with polynomials and interaction terms"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create dataframe with regression variables (including polynomials and interaction terms)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>wind</th>\n",
       "      <th>solar</th>\n",
       "      <th>dem</th>\n",
       "      <th>kgas</th>\n",
       "      <th>kcoal</th>\n",
       "      <th>t</th>\n",
       "      <th>phi</th>\n",
       "      <th>const</th>\n",
       "      <th>dem_dem</th>\n",
       "      <th>dem_dem_dem</th>\n",
       "      <th>wind_wind</th>\n",
       "      <th>wind_wind_wind</th>\n",
       "      <th>solar_solar</th>\n",
       "      <th>solar_solar_solar</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2015-01-01 00:00:00</th>\n",
       "      <td>6989.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>17791.516667</td>\n",
       "      <td>2407.150000</td>\n",
       "      <td>8927.000000</td>\n",
       "      <td>8.485714</td>\n",
       "      <td>0.58942</td>\n",
       "      <td>1</td>\n",
       "      <td>3.165381e+08</td>\n",
       "      <td>5.631692e+12</td>\n",
       "      <td>48853110.25</td>\n",
       "      <td>3.414588e+11</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-01-01 01:00:00</th>\n",
       "      <td>6971.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>17489.516667</td>\n",
       "      <td>21984.466667</td>\n",
       "      <td>13416.333333</td>\n",
       "      <td>8.485714</td>\n",
       "      <td>0.58942</td>\n",
       "      <td>1</td>\n",
       "      <td>3.058832e+08</td>\n",
       "      <td>5.349749e+12</td>\n",
       "      <td>48594841.00</td>\n",
       "      <td>3.387546e+11</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       wind  solar           dem          kgas         kcoal  \\\n",
       "date                                                                           \n",
       "2015-01-01 00:00:00  6989.5    0.0  17791.516667   2407.150000   8927.000000   \n",
       "2015-01-01 01:00:00  6971.0    0.0  17489.516667  21984.466667  13416.333333   \n",
       "\n",
       "                            t      phi  const       dem_dem   dem_dem_dem  \\\n",
       "date                                                                        \n",
       "2015-01-01 00:00:00  8.485714  0.58942      1  3.165381e+08  5.631692e+12   \n",
       "2015-01-01 01:00:00  8.485714  0.58942      1  3.058832e+08  5.349749e+12   \n",
       "\n",
       "                       wind_wind  wind_wind_wind  solar_solar  \\\n",
       "date                                                            \n",
       "2015-01-01 00:00:00  48853110.25    3.414588e+11          0.0   \n",
       "2015-01-01 01:00:00  48594841.00    3.387546e+11          0.0   \n",
       "\n",
       "                     solar_solar_solar  \n",
       "date                                    \n",
       "2015-01-01 00:00:00                0.0  \n",
       "2015-01-01 01:00:00                0.0  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_orig = df_short.copy()\n",
    "\n",
    "var_poly = [\"dem\",\"wind\",\"solar\"]\n",
    "df_poly = get_poly(df_short, var_poly)\n",
    "\n",
    "# Combine all dataframes that are needed for a regression\n",
    "df_orig2 = pd.concat([df_orig, df_poly], axis=1)\n",
    "\n",
    "df_orig2.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "name = \"poly\"\n",
    "df_vars = df_orig2\n",
    "# Add indicators for regime\n",
    "df_reg_ = get_regimes(df_in, df_vars, r_bar, name)\n",
    "df_poly, res_poly   = get_results(df_reg_, df_tech, name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Specifications with lags"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Linear model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define number of lags\n",
    "num_lag = 12"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Linear"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "name = \"linear_%s\" %num_lag\n",
    "\n",
    "# Choose variables to lag\n",
    "var_lag = df_short.columns #df_orig2.columns\n",
    "df_lag = get_lags(df_short, var_lag, num_lag)\n",
    "\n",
    "# Combine all dataframes that are needed for a regression\n",
    "df_vars = pd.concat([df_short, df_lag], axis=1) #df_orig2 , df_in[col_hour] ##df_lag\n",
    "\n",
    "df_vars = df_vars[:-num_lag].copy()\n",
    "df_tech2 = df_tech[:-num_lag].copy()\n",
    "df_in2   = df_in[:-num_lag].copy()\n",
    "\n",
    "# Add indicators for regime\n",
    "df_regg = get_regimes(df_in2, df_vars, r_bar, name)\n",
    "df_linear_12, res_linear_12  = get_results(df_regg, df_tech2, name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Hourly FE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "name = \"hourFE_%s\" %num_lag\n",
    "\n",
    "# Add dummies\n",
    "df_reg_ = pd.concat([df_regg, df_in2[col_hour]], axis=1)\n",
    "# Regression\n",
    "df_hourFE, res_hourFE = get_results(df_reg_, df_tech2, name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Month FE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "name = \"monthFE_%s\" %num_lag\n",
    "\n",
    "# Add dummies\n",
    "df_reg_ = pd.concat([df_regg, df_in2[col_month]], axis=1)\n",
    "# Regression\n",
    "df_monthFE, res_monthFE     = get_results(df_reg_, df_tech2, name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Month-of-sample FE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "name = \"mofyFE_%s\" %num_lag\n",
    "\n",
    "# Add dummies\n",
    "df_reg_ = pd.concat([df_regg, df_in2[col_mofy]], axis=1)\n",
    "# Regression\n",
    "df_mofyFE, res_mofyFE     = get_results(df_reg_, df_tech2, name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Robustness: Alternative Thresholds"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Phi = 0.7"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "name = \"phi_07_%s\" %num_lag\n",
    "r_bar_temp = 0.7\n",
    "\n",
    "# Add indicators for regime\n",
    "df_regg = get_regimes(df_in2, df_vars, r_bar_temp, name)\n",
    "df_phi_07, res_phi_07   = get_results(df_regg, df_tech2, name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Phi = 0.8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "name = \"phi_08_%s\" %num_lag\n",
    "r_bar_temp = 0.8\n",
    "\n",
    "# Add indicators for regime\n",
    "df_regg = get_regimes(df_in2, df_vars, r_bar_temp, name)\n",
    "df_phi_08, res_phi_08 = get_results(df_regg, df_tech2, name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Robustness: Semi-Parametric"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Renewable percentiles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Choose variables for interaction and create dataframe without these variables\n",
    "v = [\"wind\", \"solar\", \"dem\", \"const\"]\n",
    "df_short_ = df_short.drop(columns=v, axis=1)\n",
    "\n",
    "# Interact percentiles with selected variables\n",
    "df_demperc_ = get_perc_interaction(v, df_demperc)\n",
    "df_REperc_ = get_perc_interaction(v, df_REperc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "name = \"REperc_%s\" %num_lag\n",
    "\n",
    "# Combine all dataframes that are needed for a regression\n",
    "df_vars = pd.concat([df_short_, df_REperc_, df_lag], axis=1)\n",
    "\n",
    "df_vars = df_vars[:-num_lag].copy()\n",
    "\n",
    "# Add indicators for redf_REperc_gime\n",
    "df_regg = get_regimes(df_in2, df_vars, r_bar, name)\n",
    "# Regression\n",
    "df_REperc, res_REperc = get_results(df_regg, df_tech2, name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Semi-parametric: Demand percentiles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "name = \"demperc_%s\" %num_lag\n",
    "\n",
    "# Combine all dataframes that are needed for a regression\n",
    "df_vars = pd.concat([df_short_, df_demperc_, df_lag], axis=1)\n",
    "\n",
    "df_vars = df_vars[:-num_lag].copy()\n",
    "\n",
    "# Add indicators for regime\n",
    "df_regg = get_regimes(df_in2, df_vars, r_bar, name)\n",
    "# Regression\n",
    "df_demperc, res_demperc = get_results(df_regg, df_tech2, name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Polynomials and interaction terms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "name = \"poly_%s\" %num_lag\n",
    "\n",
    "# Choose variables to lag\n",
    "var_lag = df_orig2.columns #df_orig2.columns\n",
    "df_lag = get_lags(df_orig2, var_lag, num_lag)\n",
    "\n",
    "# Combine all dataframes that are needed for a regression\n",
    "df_vars = pd.concat([df_orig2, df_lag], axis=1)\n",
    "\n",
    "df_vars = df_vars[:-num_lag].copy()\n",
    "\n",
    "# Add indicators for regime\n",
    "df_regg = get_regimes(df_in2, df_vars, r_bar, name)\n",
    "#df_reg_ = pd.concat([df_reg_, df_in2[col_hour]], axis=1)\n",
    "df_poly_12, res_poly_12  = get_results(df_regg, df_tech2, name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Export Results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Save dataframe with coefficients, standard errors, etc."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_results = pd.concat([df_linear, df_linear_12, df_poly, df_poly_12,\n",
    "                        df_hourFE, df_mofyFE, df_monthFE, \n",
    "                        df_phi_07, df_phi_08,\n",
    "                        df_demperc, df_REperc])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_results = df_results.reset_index()\n",
    "df_results = df_results.rename(columns={\"index\":\"variable\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_results.to_csv(dir_out + \"df_coefs_lags12.csv\", index=False)\n",
    "df_results.to_excel(dir_out + \"df_coefs_lags12.xlsx\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
